In this notebook, we will annotate the cells in our tonsil atlas (level 1) using the markers we found for each cluster. Importantly, we will split or merge different clusters to group cells into biologically sound cell types and states.
library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)
# Paths
path_to_obj <- here::here("scRNA-seq/results/R_objects/tonsil_rna_integrated_clustered_level_1.rds")
path_to_markers <- here::here("scRNA-seq/3-clustering/1-level_1/tmp/markers_level_1")
path_to_save <- here::here("scRNA-seq/results/R_objects/tonsil_rna_integrated_annotated_level_1.rds")
path_to_save_df <- here::here("scRNA-seq/3-clustering/1-level_1/tmp/annotation_level_1_multiome.rds")
# Functions
source(here::here("scRNA-seq/bin/utils.R"))
# Seurat object
tonsil <- readRDS(path_to_obj)
tonsil
## An object of class Seurat
## 37378 features across 362395 samples within 1 assay
## Active assay: RNA (37378 features, 0 variable features)
## 3 dimensional reductions calculated: pca, umap, harmony
# Markers
markers_rds <- list.files(path_to_markers)
markers_paths <- str_c(path_to_markers, markers_rds, sep = "/")
markers_dfs <- purrr::map(markers_paths, readRDS)
names(markers_dfs) <- markers_rds %>%
str_remove("^markers_cluster_") %>%
str_remove("_level_1.rds")
This is the current clustering:
table(tonsil$seurat_clusters) / ncol(tonsil) * 100
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 31.0305054 23.0375695 16.1103216 13.8357317 3.5651706 3.5063950 3.4547938 1.6854537 1.3692242 0.9241297 0.8427269 0.3785924 0.2593855
colors <- c("#5A5156", "#E4E1E3", "#F6222E", "#FE00FA", "#16FF32", "#3283FE",
"#FEAF16", "#B00068", "#1CFFCE", "#90AD1C", "#2ED9FF", "#DEA0FD",
"#AA0DFE")
p <- DimPlot(
tonsil,
group.by = "seurat_clusters",
pt.size = 0.1,
cols = colors
)
p
# Stratified by age
umap_age <- plot_split_umap(tonsil, "age_group")
umap_age
sorted_names <- names(markers_dfs) %>%
as.numeric() %>%
sort() %>%
as.character()
markers_dfs <- markers_dfs[sorted_names]
markers_dfs2 <- purrr::map(names(markers_dfs), function(cluster) {
df <- markers_dfs[[cluster]] %>%
rownames_to_column(var = "gene") %>%
filter(p_val_adj < 0.001 & avg_logFC > 0.75) %>%
mutate(cluster = cluster) %>%
arrange(desc(avg_logFC))
df
})
names(markers_dfs2) <- names(markers_dfs)
markers_df <- bind_rows(markers_dfs2)
DT::datatable(markers_df)
Notably, cluster 8 and 10 do not seem to have any distinctive marker:
DT::datatable(markers_dfs[["8"]])
DT::datatable(markers_dfs[["10"]])
We begin by visualizing well-known lineage-specific markers:
canonical_markers <- c("CD79A", "CD79B", "CD3D", "CD3E", "NKG7", "LYZ",
"IGHD", "IGHM", "IGHA1", "IGHG1", "FDCSP", "PTCRA",
"XBP1", "TOP2A", "KRT19", "SPRR3", "DNTT", "VPREB1")
canonical_markers_gg <- purrr::map(canonical_markers, function(x) {
p <- feature_plot_doublets(seurat_obj = tonsil, feature = x)
p
})
names(canonical_markers_gg) <- canonical_markers
canonical_markers_gg
## $CD79A
##
## $CD79B
##
## $CD3D
##
## $CD3E
##
## $NKG7
##
## $LYZ
##
## $IGHD
##
## $IGHM
##
## $IGHA1
##
## $IGHG1
##
## $FDCSP
##
## $PTCRA
##
## $XBP1
##
## $TOP2A
##
## $KRT19
##
## $SPRR3
##
## $DNTT
##
## $VPREB1
Let us now visualize a subset of top markers for each cluster:
selected_markers <- list(
"0" = c("BANK1", "FCER2", "MARCH1", "CD83"),
"1" = c("CD3D", "IL7R", "CD2", "GIMAP7"),
"2" = c("TOP2A", "MKI67", "CENPF", "HMGB1"),
"3" = c("MARCKSL1", "RGS13", "CCDC88A", "LMO2"),
"4" = c("GNLY", "NKG7", "GZMK", "CD8A"),
"5" = c("FCRL4", "FCRL5", "PLAC8", "SOX5"),
"6" = c("IGHG1", "IGHA1", "JCHAIN", "XBP1"),
"7" = c("LYZ", "S100A8", "APOE", "AIF1"),
"8" = c("TXNIP", "RPS10", "TRBC2", "TCF7"),
"9" = c("FDCSP", "CLU", "CXCL13", "CR2"),
"10" = c("MT2A", "CD3D", "TRAC", "PCNA"),
"11" = c("PTPRCAP", "CD37", "HSPA1B", "CD74"),
"12" = c("PTCRA", "LILRA4", "CLECL4C", "IRF7")
)
purrr::map(selected_markers, function(l) {
FeaturePlot(tonsil, features = l, ncol = 2, pt.size = 0.1)
})
## $`0`
##
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`
##
## $`6`